
# generate appendix D1, figures D1 to D14 and table 1

library(rio) # to load data
library(tidyverse) # to reshape data
library(forecast) # to run time series diagnostics
library(urca) # to run time series diagnostics
library(fixest) # to estimate a fixed effects ols

# set your working directory
setwd("~/replication_files")

# to test for stationarity, need to use complete observations
ts_data <- import("data/annual_data.csv") %>%
  select(log_ratio_multilateral_bonds,
         log_ratio_bilateral_bonds,
         log_ratio_commercial_banks_bonds,
         terms_of_trade,log_oil_production,resource_rents_perc,iso3c,year) %>%
  drop_na()

# confidence intervals
df_ci <- ts_data %>% 
  group_by(iso3c) %>% 
  summarise(ci = qnorm((1 + 0.95)/2)/sqrt(n()))


# figure D1
df_acf1 <- ts_data %>% 
  group_by(iso3c) %>% 
  summarise(list_acf = list(acf(log_ratio_multilateral_bonds, plot = FALSE))) %>%
  mutate(acf_vals = purrr::map(list_acf, ~as.numeric(.x$acf))) %>% 
  select(-list_acf) %>% 
  unnest(cols = c(acf_vals)) %>% 
  group_by(iso3c) %>% 
  mutate(lag = row_number() - 1)

ggplot(df_acf1, aes(x = lag, y = acf_vals)) +
  geom_bar(stat = "identity", width=.3) +
  geom_hline(yintercept = 0) +
  geom_hline(data = df_ci, aes(yintercept = -ci), color = "blue", linetype="dotted") +
  geom_hline(data = df_ci, aes(yintercept = ci), color = "blue", linetype="dotted") +
  labs(x="Lag", y ="ACF") +
  facet_wrap(~iso3c, ncol = 5) + theme_classic() 

ggsave("figures/figureD1.pdf", height = 8, width = 8)

# figure D2
df_pacf2 <- ts_data %>% 
  group_by(iso3c) %>% 
  summarise(list_pacf = list(pacf(log_ratio_multilateral_bonds, plot = FALSE))) %>%
  mutate(pacf_vals = purrr::map(list_pacf, ~as.numeric(.x$acf))) %>% 
  select(-list_pacf) %>% 
  unnest(cols = c(pacf_vals)) %>% 
  group_by(iso3c) %>% 
  mutate(lag = row_number() - 1)

ggplot(df_pacf2, aes(x = lag, y = pacf_vals)) +
  geom_bar(stat = "identity", width=.3) +
  geom_hline(yintercept = 0) +
  geom_hline(data = df_ci, aes(yintercept = -ci), color = "blue", linetype="dotted") +
  geom_hline(data = df_ci, aes(yintercept = ci), color = "blue", linetype="dotted") +
  labs(x="Lag", y ="PACF") +
  facet_wrap(~iso3c, ncol = 5) + theme_classic() 

ggsave("figures/figureD2.pdf", height = 8, width = 8)

# figure D3
df_acf3 <- ts_data %>% 
  group_by(iso3c) %>% 
  summarise(list_acf = list(acf(log_ratio_bilateral_bonds, plot = FALSE))) %>%
  mutate(acf_vals = purrr::map(list_acf, ~as.numeric(.x$acf))) %>% 
  select(-list_acf) %>% 
  unnest(cols = c(acf_vals)) %>% 
  group_by(iso3c) %>% 
  mutate(lag = row_number() - 1)

ggplot(df_acf3, aes(x = lag, y = acf_vals)) +
  geom_bar(stat = "identity", width=.3) +
  geom_hline(yintercept = 0) +
  geom_hline(data = df_ci, aes(yintercept = -ci), color = "blue", linetype="dotted") +
  geom_hline(data = df_ci, aes(yintercept = ci), color = "blue", linetype="dotted") +
  labs(x="Lag", y ="ACF") +
  facet_wrap(~iso3c, ncol = 5) + theme_classic() 

ggsave("figures/figureD3.pdf", height = 8, width = 8)

# figure D4
df_pacf4 <- ts_data %>% 
  group_by(iso3c) %>% 
  summarise(list_pacf = list(pacf(log_ratio_bilateral_bonds, plot = FALSE))) %>%
  mutate(pacf_vals = purrr::map(list_pacf, ~as.numeric(.x$acf))) %>% 
  select(-list_pacf) %>% 
  unnest(cols = c(pacf_vals)) %>% 
  group_by(iso3c) %>% 
  mutate(lag = row_number() - 1)

ggplot(df_pacf4, aes(x = lag, y = pacf_vals)) +
  geom_bar(stat = "identity", width=.3) +
  geom_hline(yintercept = 0) +
  geom_hline(data = df_ci, aes(yintercept = -ci), color = "blue", linetype="dotted") +
  geom_hline(data = df_ci, aes(yintercept = ci), color = "blue", linetype="dotted") +
  labs(x="Lag", y ="PACF") +
  facet_wrap(~iso3c, ncol = 5) + theme_classic() 

ggsave("figures/figureD4.pdf", height = 8, width = 8)

# figure D5
df_acf5 <- ts_data %>% 
  group_by(iso3c) %>% 
  summarise(list_acf = list(acf(log_ratio_commercial_banks_bonds, plot = FALSE))) %>%
  mutate(acf_vals = purrr::map(list_acf, ~as.numeric(.x$acf))) %>% 
  select(-list_acf) %>% 
  unnest(cols = c(acf_vals)) %>% 
  group_by(iso3c) %>% 
  mutate(lag = row_number() - 1)

ggplot(df_acf5, aes(x = lag, y = acf_vals)) +
  geom_bar(stat = "identity", width=.3) +
  geom_hline(yintercept = 0) +
  geom_hline(data = df_ci, aes(yintercept = -ci), color = "blue", linetype="dotted") +
  geom_hline(data = df_ci, aes(yintercept = ci), color = "blue", linetype="dotted") +
  labs(x="Lag", y ="ACF") +
  facet_wrap(~iso3c, ncol = 5) + theme_classic() 

ggsave("figures/figureD5.pdf", height = 8, width = 8)

# figure D6
df_pacf6 <- ts_data %>% 
  group_by(iso3c) %>% 
  summarise(list_pacf = list(pacf(log_ratio_commercial_banks_bonds, plot = FALSE))) %>%
  mutate(pacf_vals = purrr::map(list_pacf, ~as.numeric(.x$acf))) %>% 
  select(-list_pacf) %>% 
  unnest(cols = c(pacf_vals)) %>% 
  group_by(iso3c) %>% 
  mutate(lag = row_number() - 1)

ggplot(df_pacf6, aes(x = lag, y = pacf_vals)) +
  geom_bar(stat = "identity", width=.3) +
  geom_hline(yintercept = 0) +
  geom_hline(data = df_ci, aes(yintercept = -ci), color = "blue", linetype="dotted") +
  geom_hline(data = df_ci, aes(yintercept = ci), color = "blue", linetype="dotted") +
  labs(x="Lag", y ="PACF") +
  facet_wrap(~iso3c, ncol = 5) + theme_classic() 

ggsave("figures/figureD6.pdf", height = 8, width = 8)

# figure D7
df_acf7 <- ts_data %>% 
  group_by(iso3c) %>% 
  summarise(list_acf = list(acf(resource_rents_perc, plot = FALSE))) %>%
  mutate(acf_vals = purrr::map(list_acf, ~as.numeric(.x$acf))) %>% 
  select(-list_acf) %>% 
  unnest(cols = c(acf_vals)) %>% 
  group_by(iso3c) %>% 
  mutate(lag = row_number() - 1)

ggplot(df_acf7, aes(x = lag, y = acf_vals)) +
  geom_bar(stat = "identity", width=.3) +
  geom_hline(yintercept = 0) +
  geom_hline(data = df_ci, aes(yintercept = -ci), color = "blue", linetype="dotted") +
  geom_hline(data = df_ci, aes(yintercept = ci), color = "blue", linetype="dotted") +
  labs(x="Lag", y ="ACF") +
  facet_wrap(~iso3c, ncol = 5) + theme_classic() 

ggsave("figures/figureD7.pdf", height = 8, width = 8)

# figure D8
df_pacf8 <- ts_data %>% 
  group_by(iso3c) %>% 
  summarise(list_pacf = list(pacf(resource_rents_perc, plot = FALSE))) %>%
  mutate(pacf_vals = purrr::map(list_pacf, ~as.numeric(.x$acf))) %>% 
  select(-list_pacf) %>% 
  unnest(cols = c(pacf_vals)) %>% 
  group_by(iso3c) %>% 
  mutate(lag = row_number() - 1)

ggplot(df_pacf8, aes(x = lag, y = pacf_vals)) +
  geom_bar(stat = "identity", width=.3) +
  geom_hline(yintercept = 0) +
  geom_hline(data = df_ci, aes(yintercept = -ci), color = "blue", linetype="dotted") +
  geom_hline(data = df_ci, aes(yintercept = ci), color = "blue", linetype="dotted") +
  labs(x="Lag", y ="PACF") +
  facet_wrap(~iso3c, ncol = 5) + theme_classic() 

ggsave("figures/figureD8.pdf", height = 8, width = 8)

# figure D9
df_acf9 <- ts_data %>% 
  group_by(iso3c) %>% 
  summarise(list_acf = list(acf(log_oil_production, plot = FALSE))) %>%
  mutate(acf_vals = purrr::map(list_acf, ~as.numeric(.x$acf))) %>% 
  select(-list_acf) %>% 
  unnest(cols = c(acf_vals)) %>% 
  group_by(iso3c) %>% 
  mutate(lag = row_number() - 1)

ggplot(df_acf9, aes(x = lag, y = acf_vals)) +
  geom_bar(stat = "identity", width=.3) +
  geom_hline(yintercept = 0) +
  geom_hline(data = df_ci, aes(yintercept = -ci), color = "blue", linetype="dotted") +
  geom_hline(data = df_ci, aes(yintercept = ci), color = "blue", linetype="dotted") +
  labs(x="Lag", y ="ACF") +
  facet_wrap(~iso3c, ncol = 5) + theme_classic() 

ggsave("figures/figureD9.pdf", height = 8, width = 8)

# figure D10
df_pacf10 <- ts_data %>% 
  group_by(iso3c) %>% 
  summarise(list_pacf = list(pacf(log_oil_production, plot = FALSE))) %>%
  mutate(pacf_vals = purrr::map(list_pacf, ~as.numeric(.x$acf))) %>% 
  select(-list_pacf) %>% 
  unnest(cols = c(pacf_vals)) %>% 
  group_by(iso3c) %>% 
  mutate(lag = row_number() - 1)

ggplot(df_pacf10, aes(x = lag, y = pacf_vals)) +
  geom_bar(stat = "identity", width=.3) +
  geom_hline(yintercept = 0) +
  geom_hline(data = df_ci, aes(yintercept = -ci), color = "blue", linetype="dotted") +
  geom_hline(data = df_ci, aes(yintercept = ci), color = "blue", linetype="dotted") +
  labs(x="Lag", y ="PACF") +
  facet_wrap(~iso3c, ncol = 5) + theme_classic() 

ggsave("figures/figureD10.pdf", height = 8, width = 8)

# figure D11
df_acf11 <- ts_data %>% 
  group_by(iso3c) %>% 
  summarise(list_acf = list(acf(terms_of_trade, plot = FALSE))) %>%
  mutate(acf_vals = purrr::map(list_acf, ~as.numeric(.x$acf))) %>% 
  select(-list_acf) %>% 
  unnest(cols = c(acf_vals)) %>% 
  group_by(iso3c) %>% 
  mutate(lag = row_number() - 1)

ggplot(df_acf11, aes(x = lag, y = acf_vals)) +
  geom_bar(stat = "identity", width=.3) +
  geom_hline(yintercept = 0) +
  geom_hline(data = df_ci, aes(yintercept = -ci), color = "blue", linetype="dotted") +
  geom_hline(data = df_ci, aes(yintercept = ci), color = "blue", linetype="dotted") +
  labs(x="Lag", y ="ACF") +
  facet_wrap(~iso3c, ncol = 5) + theme_classic() 

ggsave("figures/figureD11.pdf", height = 8, width = 8)

# figure D12
df_pacf12 <- ts_data %>% 
  group_by(iso3c) %>% 
  summarise(list_pacf = list(pacf(terms_of_trade, plot = FALSE))) %>%
  mutate(pacf_vals = purrr::map(list_pacf, ~as.numeric(.x$acf))) %>% 
  select(-list_pacf) %>% 
  unnest(cols = c(pacf_vals)) %>% 
  group_by(iso3c) %>% 
  mutate(lag = row_number() - 1)

ggplot(df_pacf12, aes(x = lag, y = pacf_vals)) +
  geom_bar(stat = "identity", width=.3) +
  geom_hline(yintercept = 0) +
  geom_hline(data = df_ci, aes(yintercept = -ci), color = "blue", linetype="dotted") +
  geom_hline(data = df_ci, aes(yintercept = ci), color = "blue", linetype="dotted") +
  labs(x="Lag", y ="PACF") +
  facet_wrap(~iso3c, ncol = 5) + theme_classic() 

ggsave("figures/figureD12.pdf", height = 8, width = 8)




## bonus: test for stationarity, by variable and by country
# for brevity, only do this with the first dv

# dv: log (multilateral/bonds)
auto.arima(ts_data$log_ratio_multilateral_bonds[ts_data$iso3c == "ARG"], 
           test=c("kpss","adf","pp"), trace=TRUE) # ARIMA(1,0,0) with non-zero mean 
auto.arima(ts_data$log_ratio_multilateral_bonds[ts_data$iso3c == "BOL"], 
           test=c("kpss","adf","pp"), trace=TRUE) # ARIMA(0,1,0)
auto.arima(ts_data$log_ratio_multilateral_bonds[ts_data$iso3c == "BRA"], 
           test=c("kpss","adf","pp"), trace=TRUE) # ARIMA(0,1,0) with drift
auto.arima(ts_data$log_ratio_multilateral_bonds[ts_data$iso3c == "COL"], 
           test=c("kpss","adf","pp"), trace=TRUE) # ARIMA(0,1,0)
auto.arima(ts_data$log_ratio_multilateral_bonds[ts_data$iso3c == "CRI"], 
           test=c("kpss","adf","pp"), trace=TRUE) # ARIMA(0,1,0) 
auto.arima(ts_data$log_ratio_multilateral_bonds[ts_data$iso3c == "DOM"], 
           test=c("kpss","adf","pp"), trace=TRUE) # ARIMA(0,1,0)
auto.arima(ts_data$log_ratio_multilateral_bonds[ts_data$iso3c == "ECU"], 
           test=c("kpss","adf","pp"), trace=TRUE) # ARIMA(0,1,0)
auto.arima(ts_data$log_ratio_multilateral_bonds[ts_data$iso3c == "GTM"], 
           test=c("kpss","adf","pp"), trace=TRUE) # ARIMA(0,1,0)
auto.arima(ts_data$log_ratio_multilateral_bonds[ts_data$iso3c == "GUY"], 
           test=c("kpss","adf","pp"), trace=TRUE) # ARIMA(0,1,0)
auto.arima(ts_data$log_ratio_multilateral_bonds[ts_data$iso3c == "HND"], 
           test=c("kpss","adf","pp"), trace=TRUE) # ARIMA(0,1,0)
auto.arima(ts_data$log_ratio_multilateral_bonds[ts_data$iso3c == "JAM"], 
           test=c("kpss","adf","pp"), trace=TRUE) # ARIMA(1,2,0)
auto.arima(ts_data$log_ratio_multilateral_bonds[ts_data$iso3c == "MEX"], 
           test=c("kpss","adf","pp"), trace=TRUE) # ARIMA(0,1,0) 
auto.arima(ts_data$log_ratio_multilateral_bonds[ts_data$iso3c == "PAN"], 
           test=c("kpss","adf","pp"), trace=TRUE) # ARIMA(1,1,0)
auto.arima(ts_data$log_ratio_multilateral_bonds[ts_data$iso3c == "PER"], 
           test=c("kpss","adf","pp"), trace=TRUE) # ARIMA(0,1,0)
auto.arima(ts_data$log_ratio_multilateral_bonds[ts_data$iso3c == "SLV"], 
           test=c("kpss","adf","pp"), trace=TRUE) # ARIMA(0,1,0)
auto.arima(ts_data$log_ratio_multilateral_bonds[ts_data$iso3c == "VEN"], 
           test=c("kpss","adf","pp"), trace=TRUE) # ARIMA(1,1,0)

# iv: resource rents
auto.arima(ts_data$resource_rents_perc[ts_data$iso3c == "ARG"], 
           test=c("kpss","adf","pp"), trace=TRUE) # ARIMA(1,0,0)
auto.arima(ts_data$resource_rents_perc[ts_data$iso3c == "BOL"], 
           test=c("kpss","adf","pp"), trace=TRUE) # ARIMA(1,0,0)
auto.arima(ts_data$resource_rents_perc[ts_data$iso3c == "BRA"], 
           test=c("kpss","adf","pp"), trace=TRUE) # ARIMA(0,1,0)
auto.arima(ts_data$resource_rents_perc[ts_data$iso3c == "COL"], 
           test=c("kpss","adf","pp"), trace=TRUE) # ARIMA(1,0,0) with non-zero mean 
auto.arima(ts_data$resource_rents_perc[ts_data$iso3c == "CRI"], 
           test=c("kpss","adf","pp"), trace=TRUE) # ARIMA(2,0,0) with non-zero mean 
auto.arima(ts_data$resource_rents_perc[ts_data$iso3c == "DOM"], 
           test=c("kpss","adf","pp"), trace=TRUE) # ARIMA(0,0,1)
auto.arima(ts_data$resource_rents_perc[ts_data$iso3c == "ECU"], 
           test=c("kpss","adf","pp"), trace=TRUE) # ARIMA(1,0,0)
auto.arima(ts_data$resource_rents_perc[ts_data$iso3c == "GTM"], 
           test=c("kpss","adf","pp"), trace=TRUE) # ARIMA(1,0,0)
auto.arima(ts_data$resource_rents_perc[ts_data$iso3c == "GUY"], 
           test=c("kpss","adf","pp"), trace=TRUE) # ARIMA(0,1,0)
auto.arima(ts_data$resource_rents_perc[ts_data$iso3c == "HND"], 
           test=c("kpss","adf","pp"), trace=TRUE) # ARIMA(0,0,1)
auto.arima(ts_data$resource_rents_perc[ts_data$iso3c == "JAM"], 
           test=c("kpss","adf","pp"), trace=TRUE) # ARIMA(1,0,0) with non-zero mean 
auto.arima(ts_data$resource_rents_perc[ts_data$iso3c == "MEX"], 
           test=c("kpss","adf","pp"), trace=TRUE) # ARIMA(1,0,0) with non-zero mean 
auto.arima(ts_data$resource_rents_perc[ts_data$iso3c == "PAN"], 
           test=c("kpss","adf","pp"), trace=TRUE) # ARIMA(0,0,0) with non-zero mean 
auto.arima(ts_data$resource_rents_perc[ts_data$iso3c == "PER"], 
           test=c("kpss","adf","pp"), trace=TRUE) # ARIMA(0,1,0)
auto.arima(ts_data$resource_rents_perc[ts_data$iso3c == "SLV"], 
           test=c("kpss","adf","pp"), trace=TRUE) # ARIMA(1,0,0) with non-zero mean 
auto.arima(ts_data$resource_rents_perc[ts_data$iso3c == "VEN"], 
           test=c("kpss","adf","pp"), trace=TRUE) # ARIMA(0,0,1) with non-zero mean 

# iv: ln oil and gas production
auto.arima(ts_data$log_oil_production[ts_data$iso3c == "ARG"], 
           test=c("kpss","adf","pp"), trace=TRUE) # ARIMA(0,1,0)
auto.arima(ts_data$log_oil_production[ts_data$iso3c == "BOL"], 
           test=c("kpss","adf","pp"), trace=TRUE) # ARIMA(0,1,0)
auto.arima(ts_data$log_oil_production[ts_data$iso3c == "BRA"], 
           test=c("kpss","adf","pp"), trace=TRUE) # ARIMA(0,1,0) with drift
auto.arima(ts_data$log_oil_production[ts_data$iso3c == "COL"], 
           test=c("kpss","adf","pp"), trace=TRUE) # ARIMA(1,1,0)
auto.arima(ts_data$log_oil_production[ts_data$iso3c == "CRI"], 
           test=c("kpss","adf","pp"), trace=TRUE) # ARIMA(2,0,0)
auto.arima(ts_data$log_oil_production[ts_data$iso3c == "DOM"], 
           test=c("kpss","adf","pp"), trace=TRUE) # no production
auto.arima(ts_data$log_oil_production[ts_data$iso3c == "ECU"], 
           test=c("kpss","adf","pp"), trace=TRUE) # ARIMA(0,1,0)
auto.arima(ts_data$log_oil_production[ts_data$iso3c == "GTM"], 
           test=c("kpss","adf","pp"), trace=TRUE) # ARIMA(0,1,0)
auto.arima(ts_data$log_oil_production[ts_data$iso3c == "GUY"], 
           test=c("kpss","adf","pp"), trace=TRUE) # ARIMA(0,0,2) with zero mean
auto.arima(ts_data$log_oil_production[ts_data$iso3c == "HND"], 
           test=c("kpss","adf","pp"), trace=TRUE) # ARIMA(0,0,1) with non-zero mean 
auto.arima(ts_data$log_oil_production[ts_data$iso3c == "JAM"], 
           test=c("kpss","adf","pp"), trace=TRUE) # ARIMA(0,1,0)
auto.arima(ts_data$log_oil_production[ts_data$iso3c == "MEX"], 
           test=c("kpss","adf","pp"), trace=TRUE) # ARIMA(0,2,0)
auto.arima(ts_data$log_oil_production[ts_data$iso3c == "PAN"], 
           test=c("kpss","adf","pp"), trace=TRUE) # ARIMA(1,0,0) with zero mean
auto.arima(ts_data$log_oil_production[ts_data$iso3c == "PER"], 
           test=c("kpss","adf","pp"), trace=TRUE) # ARIMA(0,1,1)
auto.arima(ts_data$log_oil_production[ts_data$iso3c == "SLV"], 
           test=c("kpss","adf","pp"), trace=TRUE) # ARIMA(1,0,1) with zero mean
auto.arima(ts_data$log_oil_production[ts_data$iso3c == "VEN"], 
           test=c("kpss","adf","pp"), trace=TRUE) # ARIMA(0,1,0)

# iv: commodity price index
auto.arima(ts_data$terms_of_trade[ts_data$iso3c == "ARG"], 
           test=c("kpss","adf","pp"), trace=TRUE) # ARIMA(0,1,0)
auto.arima(ts_data$terms_of_trade[ts_data$iso3c == "BOL"], 
           test=c("kpss","adf","pp"), trace=TRUE) # ARIMA(0,1,0)
auto.arima(ts_data$terms_of_trade[ts_data$iso3c == "BRA"], 
           test=c("kpss","adf","pp"), trace=TRUE) # ARIMA(0,1,0)
auto.arima(ts_data$terms_of_trade[ts_data$iso3c == "COL"], 
           test=c("kpss","adf","pp"), trace=TRUE) # ARIMA(0,1,0)
auto.arima(ts_data$terms_of_trade[ts_data$iso3c == "CRI"], 
           test=c("kpss","adf","pp"), trace=TRUE) # ARIMA(1,0,0) with non-zero mean 
auto.arima(ts_data$terms_of_trade[ts_data$iso3c == "DOM"], 
           test=c("kpss","adf","pp"), trace=TRUE) # ARIMA(1,0,1) with non-zero mean
auto.arima(ts_data$terms_of_trade[ts_data$iso3c == "ECU"], 
           test=c("kpss","adf","pp"), trace=TRUE) # ARIMA(0,1,0)
auto.arima(ts_data$terms_of_trade[ts_data$iso3c == "GTM"], 
           test=c("kpss","adf","pp"), trace=TRUE) # ARIMA(2,0,0) with non-zero mean
auto.arima(ts_data$terms_of_trade[ts_data$iso3c == "GUY"], 
           test=c("kpss","adf","pp"), trace=TRUE) # ARIMA(0,0,1) with non-zero mean
auto.arima(ts_data$terms_of_trade[ts_data$iso3c == "HND"], 
           test=c("kpss","adf","pp"), trace=TRUE) # ARIMA(0,0,0) with non-zero mean
auto.arima(ts_data$terms_of_trade[ts_data$iso3c == "JAM"], 
           test=c("kpss","adf","pp"), trace=TRUE)  # ARIMA(0,1,0)
auto.arima(ts_data$terms_of_trade[ts_data$iso3c == "MEX"], 
           test=c("kpss","adf","pp"), trace=TRUE) # ARIMA(0,1,0)
auto.arima(ts_data$terms_of_trade[ts_data$iso3c == "PAN"], 
           test=c("kpss","adf","pp"), trace=TRUE) # ARIMA(0,1,0)
auto.arima(ts_data$terms_of_trade[ts_data$iso3c == "PER"], 
           test=c("kpss","adf","pp"), trace=TRUE) # ARIMA(0,1,0)
auto.arima(ts_data$terms_of_trade[ts_data$iso3c == "SLV"], 
           test=c("kpss","adf","pp"), trace=TRUE) # ARIMA(1,0,0) with non-zero mean
auto.arima(ts_data$terms_of_trade[ts_data$iso3c == "VEN"], 
           test=c("kpss","adf","pp"), trace=TRUE) # ARIMA(0,1,0)


# most of the variables of interest appear to be integrated for several countries. but are they co-integrated?

# engle-granger regression approach
cointegration_model1 <- feols(log_ratio_multilateral_bonds ~ terms_of_trade + log_oil_production, 
                              data = ts_data, split = ~iso3c)

cointegration_model_df1 <- residuals(cointegration_model1) %>%
  as_tibble() %>%
  pivot_longer(cols = 1:16, names_to = "iso3c", values_to = "resid") %>%
  drop_na() %>%
  mutate(iso3c = case_when(
    iso3c == "V1" ~ "ARG",
    iso3c == "V2" ~ "BOL",
    iso3c == "V3" ~ "BRA",
    iso3c == "V4" ~ "COL",
    iso3c == "V5" ~ "CRI",
    iso3c == "V6" ~ "DOM",
    iso3c == "V7" ~ "ECU",
    iso3c == "V8" ~ "GTM",
    iso3c == "V9" ~ "GUY",
    iso3c == "V10" ~ "HND",
    iso3c == "V11" ~ "JAM",
    iso3c == "V12" ~ "MEX",
    iso3c == "V13" ~ "PAN",
    iso3c == "V14" ~ "PER",
    iso3c == "V15" ~ "SLV",
    iso3c == "V16" ~ "VEN"
  ))

# figure 13
df_acf13 <- cointegration_model_df1 %>% 
  group_by(iso3c) %>% 
  summarise(list_acf = list(acf(resid, plot = FALSE))) %>%
  mutate(acf_vals = purrr::map(list_acf, ~as.numeric(.x$acf))) %>% 
  select(-list_acf) %>% 
  unnest(cols = c(acf_vals)) %>% 
  group_by(iso3c) %>% 
  mutate(lag = row_number() - 1)

ggplot(df_acf13, aes(x = lag, y = acf_vals)) +
  geom_bar(stat = "identity", width=.3) +
  geom_hline(yintercept = 0) +
  geom_hline(data = df_ci, aes(yintercept = -ci), color = "blue", linetype="dotted") +
  geom_hline(data = df_ci, aes(yintercept = ci), color = "blue", linetype="dotted") +
  labs(x="Lag", y ="ACF") +
  facet_wrap(~iso3c, ncol = 5) + theme_classic() + xlim(0,NA)

ggsave("figures/figureD13.pdf", height = 8, width = 8)

# figure 14
df_pacf14 <- cointegration_model_df1 %>% 
  group_by(iso3c) %>% 
  summarise(list_pacf = list(pacf(resid, plot = FALSE))) %>%
  mutate(pacf_vals = purrr::map(list_pacf, ~as.numeric(.x$acf))) %>% 
  select(-list_pacf) %>%
  unnest(cols = c(pacf_vals)) %>% 
  group_by(iso3c) %>% 
  mutate(lag = row_number() - 1)

ggplot(df_pacf14, aes(x = lag, y = pacf_vals)) +
  geom_bar(stat = "identity", width=.3) +
  geom_hline(yintercept = 0) +
  geom_hline(data = df_ci, aes(yintercept = -ci), color = "blue", linetype="dotted") +
  geom_hline(data = df_ci, aes(yintercept = ci), color = "blue", linetype="dotted") +
  labs(x="Lag", y ="PACF") +
  facet_wrap(~iso3c, ncol = 5) + theme_classic() + xlim(0,NA)

ggsave("figures/figureD14.pdf", height = 10, width = 8)


# johansen's var approach
# table D1
ts_data %>%
  filter(iso3c=="ARG") %>%
  select(log_ratio_multilateral_bonds,terms_of_trade,log_oil_production) %>%
  drop_na() %>%
  ca.jo(type = "trace", K = 2, ecdet = "none", spec = "longrun") %>%
  summary()

ts_data %>%
  filter(iso3c=="BOL") %>%
  select(log_ratio_multilateral_bonds,terms_of_trade,log_oil_production) %>%
  drop_na() %>%
  ca.jo(type = "trace", K = 2, ecdet = "none", spec = "longrun") %>%
  summary()

ts_data %>%
  filter(iso3c=="BRA") %>%
  select(log_ratio_multilateral_bonds,terms_of_trade,log_oil_production) %>%
  drop_na() %>%
  ca.jo(type = "trace", K = 2, ecdet = "none", spec = "longrun") %>%
  summary()

ts_data %>%
  filter(iso3c=="COL") %>%
  select(log_ratio_multilateral_bonds,terms_of_trade,log_oil_production) %>%
  drop_na() %>%
  ca.jo(type = "trace", K = 2, ecdet = "none", spec = "longrun") %>%
  summary()

ts_data %>%
  filter(iso3c=="CRI") %>%
  select(log_ratio_multilateral_bonds,terms_of_trade,log_oil_production) %>%
  drop_na() %>%
  ca.jo(type = "trace", K = 2, ecdet = "none", spec = "longrun") %>%
  summary()

ts_data %>%
  filter(iso3c=="DOM") %>%
  select(log_ratio_multilateral_bonds,terms_of_trade,log_oil_production) %>% 
  drop_na() %>%
  ca.jo(type = "trace", K = 2, ecdet = "none", spec = "longrun") %>%
  summary() # note that this doesn't work because the dominican republic doesn't produce oil

ts_data %>%
  filter(iso3c=="ECU") %>%
  select(log_ratio_multilateral_bonds,terms_of_trade,log_oil_production) %>%
  drop_na() %>%
  ca.jo(type = "trace", K = 2, ecdet = "none", spec = "longrun")  %>%
  summary()

ts_data %>%
  filter(iso3c=="GTM") %>%
  select(log_ratio_multilateral_bonds,terms_of_trade,log_oil_production) %>%
  drop_na() %>%
  ca.jo(type = "trace", K = 2, ecdet = "none", spec = "longrun") %>%
  summary()

ts_data %>%
  filter(iso3c=="GUY") %>%
  select(log_ratio_multilateral_bonds,terms_of_trade,log_oil_production) %>% 
  drop_na() %>%
  ca.jo(type = "trace", K = 2, ecdet = "none", spec = "longrun") %>%
  summary() # note that this doesn't work because guyana didn't produce oil until 2020

ts_data %>%
  filter(iso3c=="HND") %>%
  select(log_ratio_multilateral_bonds,terms_of_trade,log_oil_production) %>%
  drop_na() %>%
  ca.jo(type = "trace", K = 2, ecdet = "none", spec = "longrun") %>% 
  summary() # insufficient degrees of freedom

ts_data %>%
  filter(iso3c=="JAM") %>%
  select(log_ratio_multilateral_bonds,terms_of_trade,log_oil_production) %>%
  drop_na() %>%
  ca.jo(type = "trace", K = 2, ecdet = "none", spec = "longrun") %>% 
  summary()

ts_data %>%
  filter(iso3c=="MEX") %>%
  select(log_ratio_multilateral_bonds,terms_of_trade,log_oil_production) %>%
  drop_na() %>%
  ca.jo(type = "trace", K = 2, ecdet = "none", spec = "longrun") %>% 
  summary()

ts_data %>%
  filter(iso3c=="PAN") %>%
  select(log_ratio_multilateral_bonds,terms_of_trade,log_oil_production) %>%
  drop_na() %>%
  ca.jo(type = "trace", K = 2, ecdet = "none", spec = "longrun") %>% 
  summary()

ts_data %>%
  filter(iso3c=="PER") %>%
  select(log_ratio_multilateral_bonds,terms_of_trade,log_oil_production) %>%
  drop_na() %>%
  ca.jo(type = "trace", K = 2, ecdet = "none", spec = "longrun") %>% 
  summary()

ts_data %>%
  filter(iso3c=="SLV") %>%
  select(log_ratio_multilateral_bonds,terms_of_trade,log_oil_production) %>%
  drop_na() %>%
  ca.jo(type = "trace", K = 2, ecdet = "none", spec = "longrun") %>% 
  summary()

ts_data %>%
  filter(iso3c=="VEN") %>%
  select(log_ratio_multilateral_bonds,terms_of_trade,log_oil_production) %>%
  drop_na() %>%
  ca.jo(type = "trace", K = 2, ecdet = "none", spec = "longrun") %>% 
  summary()
